2024 Dev/GlowwormRankPlot.R

#' @title GlowwormRankPlot
#' @description This function generates a plot showing the top enriched cell populations for the target gene list compared to the secondary gene lists
#' @param Input Output from GlowwormScale or GlowwormIntegrate
#' @param ShowTop A numeric value indicating how many of the top ranked genes to show. Default is all genes.
#' @param Colors A vector of colors to use for each dataset
#' @param Frequency A boolean indicating whether the frequency of genes should be indicated by the dot size. Default is TRUE. If FALSE then a bar plot will be used.
#' @param Flip A boolean indicating whether the cell populations should be on the x-axis (FALSE) or y-axis (TRUE)
#' @param Legend Show Legend. Default is true.
#' @param RotateXText Rotate gene names on x-axis. Default is 0, but 45 and 90 are well supported.
#' @param LabelSize Size of gene names. Default is 11.
#' @return Plot showing most enriched cell populations
#' @importFrom ggpubr get_legend as_ggplot ggarrange
#' @importFrom tidyr gather
#' @export



GlowwormRankPlot = function(Input, ShowTop = "All", Colors = "Default", RotateXText = 0, LabelSize = 11, Legend=T, Frequency = T, Flip = F){
  
  #Set up colors
  if('Default' %in% Colors & "Dataset" %in% colnames(Input@MetaData) ==T & length(unique(Input@MetaData[["Dataset"]])) > 1){Cols = hue_pal()(length(unique(Input@MetaData[["Dataset"]])))
  }else if('Default' %in% Colors & "Dataset" %in% colnames(Input@MetaData) == T & length(unique(Input@MetaData[["Dataset"]])) == 1 | 'Default' %in% Colors & "Dataset" %in% colnames(Input@MetaData) == F){Cols = "#808080"
  }else if(class(Colors) == "character" & length(Colors) < length(unique(Input@MetaData[["Dataset"]]))){stop("Need to provide the same number of colors as datasets , or use 'Default'")
  }else if(class(Colors) == "character" & length(Colors) >= length(unique(Input@MetaData[["Dataset"]]))){
    Cols = Colors
  }
  
  #Identify enriched genes  
  TopDeltas = Input@RankScores
  #TopDeltas$Significant = ifelse(TopDeltas$P.val.adj < 0.01 & TopDeltas$FrequencyTarget > sqrt(length(Input@Settings$TargetGenes)) & TopDeltas$meanDiff > max(TopDeltas$meanDiff)^2 & TopDeltas$Percent_DeltaSub > max(TopDeltas$Percent_DeltaSub)^2, "Enriched", "")
  TopDeltas = data.frame(TopDeltas[order(-TopDeltas$avgPercent),]) 
  #TopDeltas$RankScore = seq(1, dim(TopDeltas)[1], 1)
  SigData = subset(TopDeltas, Significant == "Enriched")
  
  #Print the number of enriched populations  
  EnrichedPops = subset(TopDeltas, TopDeltas$Significant %in% "Enriched")
  nEnriched = dim(EnrichedPops)[1]
  if(dim(EnrichedPops)[1] == 0){
    EnrichedMsg = "\nNo populations were statistically enriched for the gene list\n"
  }else if(dim(EnrichedPops)[1] == 1){
    EnrichedMsg = "1 population was statistically enriched for the gene list\n"
  }else if(dim(EnrichedPops)[1] > 1){  
    EnrichedMsg = paste("\n", nEnriched, " populations were statistically enriched for the gene list\n", sep="")
  }
  cat(EnrichedMsg)
  
  #Select the number of enriched genes to plot
  TopDeltas$Significant = factor(TopDeltas$Significant, levels = c("Enriched", ""))
  TopDeltas = data.frame(TopDeltas[order(TopDeltas$Significant, -TopDeltas$avgPercent, -TopDeltas$avgMeanDiff),]) 
  EnrichedPops = subset(TopDeltas, Significant == "Enriched")  
  
  if(ShowTop == "All"){
    EnrichedPops  = EnrichedPops
  }else if(class(ShowTop) == "numeric"){
    EnrichedPops = head(EnrichedPops, ShowTop)
    EnrichedPops = data.frame(EnrichedPops[order(EnrichedPops$Significant, -EnrichedPops$avgPercent, -EnrichedPops$avgMeanDiff),])    
    if(ShowTop > nEnriched){cat(paste0("Number of signifcantly enriched populations is less than ", ShowTop, ". ", nEnriched, " signifcantly enriched populations plotted\n"))}
  }else{stop("ShowTop must be 'All', or the number of populations you wish to plot")}
  
  #If there is redundancy within the class and subclass naming then use only the subclass name on the plot
  EnrichedPops$Subclass = gsub("\\|.*", "", EnrichedPops$Combined)
  EnrichedPops$Class = gsub(".*\\|", "", EnrichedPops$Combined)   
  EnrichedPops$Class = gsub("CentralNervousSystemMacrophage", "CNS Macro", EnrichedPops$Class)
  EnrichedPops$PlotNames = ifelse(mapply(grepl,EnrichedPops$Class, EnrichedPops$Subclass) == T, EnrichedPops$Subclass, EnrichedPops$Combined)
  EnrichedPops$PlotNames = ifelse(EnrichedPops$Class == EnrichedPops$Subclass, EnrichedPops$Subclass, EnrichedPops$PlotNames)
  #Replace | with -      
  EnrichedPops$PlotNames = gsub("\\|", " - ", EnrichedPops$PlotNames)
  EnrichedPops$Combined = gsub("\\|", " - ", EnrichedPops$Combined)
  
  #Change population factoring depending on whether plot is horizontal or vertical  
  if(Flip == T){
    PlotNames = rev(EnrichedPops$PlotNames)
    PDS = rev(EnrichedPops$avgPercent)
    DSet = rev(EnrichedPops$Dataset)
    FreqT = rev(EnrichedPops$avgFreq)
  }else if(Flip == F){
    PlotNames = EnrichedPops$PlotNames
    PDS = EnrichedPops$avgPercent
    DSet = EnrichedPops$Dataset
    FreqT = EnrichedPops$avgFreq
  }
  
  #Set up axis guidelines 
  if(RotateXText == 90){SetVjust = 0.5; SetHjust = 1
  }else if(RotateXText == 45){SetVjust = 1; SetHjust = 1
  }else{SetVjust = 0; SetHjust =  0.5
  }
  
  #Set up initial plots with option for multiple colors for different datasets, and also whether dot size should indicate frequency
  if(length(Cols) == 1){ 
    if(Frequency == T){
      FreqPlot = ggplot(EnrichedPops, aes(x=1:nrow(EnrichedPops), y=PDS, size = FreqT))
    }else if(Frequency == F){
      FreqPlot = ggplot(EnrichedPops, aes(x=1:nrow(EnrichedPops), y=PDS)) 
    }
  }else if(length(Cols) > 1){ 
    if(Frequency == T){
      FreqPlot = ggplot(EnrichedPops, aes(x=1:nrow(EnrichedPops), y=PDS, size = FreqT, color=DSet)) 
    }else if(Frequency == F){
      FreqPlot = ggplot(EnrichedPops, aes(x=1:nrow(EnrichedPops), y=PDS, color=DSet)) 
    }
  }
  
  #Update plot aesthetics  
  FreqPlot = FreqPlot + theme_classic()  + geom_point( alpha = 0.7) + theme( axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5, size=LabelSize), axis.text.y = element_text(size=LabelSize), legend.text = element_text(size=LabelSize), axis.title.y = element_text(size=LabelSize),  legend.title = element_blank()) +xlab("")+ylab("Rank score") +scale_size(limits = c(0, max(EnrichedPops$avgFreq)))  + scale_color_manual(values = Cols) + scale_x_discrete(labels=PlotNames, breaks=1:nrow(EnrichedPops), limits=1:nrow(EnrichedPops))
  
  
  #Toggle legend on or off   
  if(Legend == F){
    FreqPlot = FreqPlot + NoLegend()    
  }
  if(Flip == T){
    FreqPlot = FreqPlot + coord_flip()
  }    
  
  #Output = list("Plot" = FreqPlot, "Data" = EnrichedPops)  
  
  return(FreqPlot)
  
}
Hannahglover/Glowworm documentation built on Jan. 16, 2024, 11:47 p.m.